{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e1a6720d",
   "metadata": {},
   "source": [
    "https://sz.lianjia.com/zufang/rmp5rt200600000001l0ra0erp1500/?showMore=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "174f863e",
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Title: 合租·康桥紫郡 5居室 北卧\t House Info: 龙岗区-丹竹头-康桥紫郡/12.00㎡/北/5室1厅2卫/高楼层（18层）\t Tags: 近地铁随时看房\t Brand: 6天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 合租·名门世家 5居室 南卧\t House Info: 龙岗区-横岗-名门世家/14.00㎡/南/5室2厅2卫/低楼层（15层）\t Tags: 近地铁\t Brand: 8天前维护\t Price: 1480 元/月\n",
      "\n",
      "Title: 独栋·安居乐寓 福永六店（机场东） 【押一付一】网红INS大单间！可短租 ！开业特惠 直达南山 开间\t House Info: 仅剩2间/19.00㎡/2间在租/1室0厅1卫\t Tags: 独栋公寓月租拎包入住近地铁精装开放厨房押一付一\t Brand: 安居乐寓2天前维护\t Price: 1480-1500 元/月\n",
      "\n",
      "Title: 合租·福园花园 7居室 东南卧\t House Info: 南山区-南山中心-福园花园/15.00㎡/东南/7室1厅7卫/高楼层（7层）\t Tags: 近地铁\t Brand: 链家1天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·安居乐寓 福永六店（机场东） 【民水民电】 商务首选 直达前海CBD 采光大单间 可短租 开间\t House Info: 仅剩1间/22.00㎡/1间在租/1室0厅1卫\t Tags: 独栋公寓月租拎包入住近地铁精装开放厨房押一付一\t Brand: 安居乐寓2天前维护\t Price: 1480 元/月\n",
      "\n",
      "Title: 合租·宝城花园 6居室 西北卧\t House Info: 宝安区-西乡-宝城花园/15.00㎡/西北/6室1厅3卫/中楼层（8层）\t Tags: \t Brand: 链家12天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 整租·龙光玖云著 2室1厅 南\t House Info: 坪山区-坪山-龙光玖云著/37.00㎡/南/2室1厅1卫/低楼层（22层）\t Tags: 近地铁\t Brand: 3天前维护\t Price: 1300 元/月\n",
      "\n",
      "Title: 独栋·安居乐寓 福永六店（机场东） 【可短租】超大明窗 采光好 拎包入住 民水民电 毕业特惠 开间\t House Info: 仅剩1间/22.00㎡/1间在租/1室0厅1卫\t Tags: 独栋公寓月租拎包入住近地铁精装开放厨房押一付一\t Brand: 安居乐寓2天前维护\t Price: 1480 元/月\n",
      "\n",
      "Title: 合租·宝城花园 7居室 西南卧\t House Info: 宝安区-西乡-宝城花园/12.00㎡/西南/7室0厅7卫/高楼层（8层）\t Tags: 近地铁\t Brand: 链家1天前维护\t Price: 1300 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 共和店二期 采光充足精装单间 家电齐全 押一付一（不短租） 1室1厅\t House Info: 仅剩1间/20.00㎡/1间在租/1室1厅1卫\t Tags: 独栋公寓拎包入住精装有阳台开放厨房押一付一\t Brand: 星窝青年公寓1天前维护\t Price: 1450 元/月\n",
      "\n",
      "Title: 合租·金祥都市花园 4居室 东南卧\t House Info: 罗湖区-清水河-金祥都市花园/15.00㎡/东南/4室2厅1卫/高楼层（8层）\t Tags: 近地铁\t Brand: 链家14天前维护\t Price: 1300 元/月\n",
      "\n",
      "Title: 合租·英郡年华花园二期 5居室 南卧\t House Info: 龙岗区-丹竹头-英郡年华花园二期/14.00㎡/南/5室1厅2卫/低楼层（31层）\t Tags: 近地铁\t Brand: 链家5天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 壹城中心店二期 龙华壹方天地采光好拎包入住 开间\t House Info: 仅剩2间/23.80㎡/2间在租/1室0厅1卫\t Tags: 独栋公寓精装开放厨房押一付一\t Brand: 星窝青年公寓3天前维护\t Price: 1199 元/月\n",
      "\n",
      "Title: 合租·阳光天健城 7居室 南卧\t House Info: 龙岗区-大运新城-阳光天健城/15.00㎡/南/7室2厅4卫/高楼层（27层）\t Tags: 近地铁\t Brand: 4天前维护\t Price: 1100 元/月\n",
      "\n",
      "Title: 独栋·招商蛇口品牌公寓 壹间▪坂田嘉御山（东区11栋） 短租不加价  拎包入住  押一付一（短租三个月起） 5室1厅\t House Info: 仅剩1间/10.43㎡/1间在租/5室1厅3卫\t Tags: 独栋公寓近地铁精装有阳台开放厨房押一付一\t Brand: 招商蛇口品牌公寓2天前维护\t Price: 1480 元/月\n",
      "\n",
      "Title: 合租·龙塘小区 3居室 东南卧\t House Info: 福田区-景田-龙塘小区/20.00㎡/东南/3室2厅2卫/高楼层（7层）\t Tags: \t Brand: 7天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 梅龙店A区 精装复式小单间 高性价比押一月付（不短租） 1室1厅\t House Info: 仅剩1间/15.00㎡/1间在租/1室1厅1卫\t Tags: 独栋公寓精装有阳台开放厨房\t Brand: 星窝青年公寓1天前维护\t Price: 1350 元/月\n",
      "\n",
      "Title: 合租·满京华喜悦里 6居室 东南/南卧\t House Info: 龙岗区-龙岗双龙-满京华喜悦里/28.00㎡/东南南/6室2厅3卫/中楼层（31层）\t Tags: 近地铁随时看房\t Brand: 13天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 水尾店 地铁口周边最便宜大单间 开间\t House Info: 仅剩3间/25.00㎡/3间在租/1室0厅1卫\t Tags: 独栋公寓近地铁精装有阳台开放厨房押一付一\t Brand: 星窝青年公寓16天前维护\t Price: 1499-1599 元/月\n",
      "\n",
      "Title: 合租·福园花园 7居室 东南卧\t House Info: 南山区-南山中心-福园花园/15.00㎡/东南/7室1厅7卫/高楼层（7层）\t Tags: 近地铁\t Brand: 链家1天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 壹城中心店 龙华区特惠小单间家电齐全押一付一（不短租） 1室1厅\t House Info: 仅剩1间/20.00㎡/1间在租/1室1厅1卫\t Tags: 独栋公寓拎包入住精装有阳台开放厨房押一付一\t Brand: 星窝青年公寓2天前维护\t Price: 1350 元/月\n",
      "\n",
      "Title: 合租·福园花园 7居室 东南卧\t House Info: 南山区-南山中心-福园花园/15.00㎡/东南/7室1厅7卫/高楼层（7层）\t Tags: 近地铁\t Brand: 链家1天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·泊寓 龙华清湖公社 电梯单间 近龙华富士康 宝能科技园 4号线直达市区 长租优惠 开间\t House Info: 仅剩5间/20.07-23.92㎡/5间在租/1室0厅1卫\t Tags: 独栋公寓月租近地铁有阳台押一付一\t Brand: 泊寓8天前维护\t Price: 1099-1498 元/月\n",
      "\n",
      "Title: 合租·兰丽花园 4居室 南卧\t House Info: 南山区-南头-兰丽花园/13.00㎡/南/4室0厅3卫/高楼层（8层）\t Tags: 近地铁精装\t Brand: 13天前维护\t Price: 1500 元/月\n",
      "\n",
      "Title: 独栋·泊寓 龙华清湖公社 签约一年免租30天 近龙华富士康 宝能科技园 天安云谷 开间\t House Info: 22.02-33.36㎡/6间在租/1室0厅1卫\t Tags: 独栋公寓月租近地铁有阳台押一付一\t Brand: 泊寓16天前维护\t Price: 1119-1449 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 壹城中心店二期 龙华壹方天地无遮挡单间 开间\t House Info: 仅剩2间/22.00㎡/2间在租/1室0厅1卫\t Tags: 独栋公寓精装开放厨房押一付一\t Brand: 星窝青年公寓今天维护\t Price: 1399 元/月\n",
      "\n",
      "Title: 独栋·星窝青年公寓 C3店 西乡最便宜大开间 1室1厅\t House Info: 仅剩4间/21.00-25.00㎡/4间在租/1室1厅1卫\t Tags: 独栋公寓月租精装有阳台开放厨房押一付一\t Brand: 星窝青年公寓27天前维护\t Price: 1499 元/月\n",
      "\n",
      "Title: 独栋·华润置地·有巢公寓 深圳宝安机场40栋店 双地铁福永原木精装开间 开间\t House Info: 仅剩2间/12.00-19.00㎡/2间在租/1室0厅1卫\t Tags: 独栋公寓月租近地铁精装开放厨房\t Brand: 华润置地·有巢公寓9天前维护\t Price: 1007-1162 元/月\n",
      "\n",
      "Title: 独栋·深圳柠檬公寓 三一云都总店 毕业生首选 特价loft一房 民水电 开间\t House Info: 仅剩1间/25.00㎡/1间在租/1室0厅1卫\t Tags: 独栋公寓月租拎包入住精装开放厨房\t Brand: 深圳柠檬公寓今天维护\t Price: 1320 元/月\n",
      "\n",
      "Title: 独栋·华润置地·有巢公寓 深圳宝安机场52栋店 格局方正电梯开间 开间\t House Info: 仅剩1间/17.00㎡/1间在租/1室0厅1卫\t Tags: 独栋公寓近地铁精装开放厨房\t Brand: 华润置地·有巢公寓1天前维护\t Price: 1305 元/月\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import re\n",
    "import threading\n",
    "import pandas as pd\n",
    "from lxml import etree\n",
    "# 全部信息列表\n",
    "count=list()\n",
    "\n",
    "#基础url  https://sz.lianjia.com/zufang/rmp5rt200600000001l0ra0erp1500/?showMore=1\n",
    "url = 'https://sz.lianjia.com/zufang/erp1500/?showMore=1'\n",
    "headers = {\n",
    "    'Accept': '*/*',\n",
    "    'Accept-Encoding': 'gzip, deflate, br, zstd',\n",
    "    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
    "    'Cache-Control': 'no-cache',\n",
    "    'Connection': 'keep-alive',\n",
    "    'Host': 'sz.lianjia.com',\n",
    "    'Origin':'https://sz.lianjia.com',\n",
    "    'Pragma': 'no-cache',\n",
    "    'Referer': 'https://sz.lianjia.com/',\n",
    "    'Sec-Ch-Ua': '\"Chromium\";v=\"126\", \"Microsoft Edge\";v=\"126\"',\n",
    "    'Sec-Ch-Ua-Mobile': '?0',\n",
    "    'Sec-Fetch-Dest': 'script',\n",
    "    'Sec-Fetch-Site': 'cross-site',\n",
    "    'Sec-Fetch-User': '?1',\n",
    "    'Upgrade-Insecure-Requests': '1',\n",
    "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0'}\n",
    "response=requests.get(url=url,headers=headers).text\n",
    "tree=etree.HTML(response)\n",
    "#ul列表下的全部li标签\n",
    "li_List=tree.xpath('//*[@id=\"content\"]/div[1]/div[1]/div')\n",
    "# print(li_List)\n",
    "for li in li_List:\n",
    "    # 标题\n",
    "    title = li.xpath('./div/p[1]/a/text()')[0].strip()\n",
    "    # 租房方式\n",
    "    method = title[:2]\n",
    "    # 信息\n",
    "    house_info = ''.join(li.xpath('./div/p[2]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "    # 标签\n",
    "    tags = ''.join(li.xpath('./div/p[3]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "    # 维护信息\n",
    "    brand = ''.join(li.xpath('./div/p[4]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "    # 价格\n",
    "    price = ''.join(li.xpath('./div/span//text()')).strip()\n",
    "\n",
    "    print(f\"Title: {title}\\t House Info: {house_info}\\t Tags: {tags}\\t Brand: {brand}\\t Price: {price}\\t Method: {method}\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "5ac1c92d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Execution time: 0.96 seconds\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import re\n",
    "import threading\n",
    "import pandas as pd\n",
    "from lxml import etree\n",
    "import time\n",
    "# 全部信息列表\n",
    "count=list()\n",
    "\n",
    "#生成1-10页url\n",
    "def url_creat():\n",
    "    #基础url  https://sz.lianjia.com/zufang/rmp5rt200600000001l0ra0erp1500/?showMore=1\n",
    "    url = 'https://sz.lianjia.com/zufang/erp1500/?showMore={}'\n",
    "    #生成前10页url列表\n",
    "    links=[url.format(i) for i in range(1,11)]\n",
    "    return links\n",
    "\n",
    "#对url进行解析\n",
    "def url_parse(url):\n",
    "    response=requests.get(url=url,headers=headers).text\n",
    "    tree=etree.HTML(response)\n",
    "    #ul列表下的全部li标签\n",
    "    li_List=tree.xpath('//*[@id=\"content\"]/div[1]/div[1]/div')\n",
    "    #创建线程锁对象\n",
    "    lock = threading.RLock()\n",
    "    #上锁\n",
    "    lock.acquire()\n",
    "    for li in li_List:\n",
    "        # 标题\n",
    "        title = li.xpath('./div/p[1]/a/text()')[0].strip()\n",
    "        # 租房方式\n",
    "        method = title[:2]\n",
    "        # 信息\n",
    "        house_info = ''.join(li.xpath('./div/p[2]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "        # 标签\n",
    "        tags = ''.join(li.xpath('./div/p[3]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "        # 维护信息\n",
    "        brand = ''.join(li.xpath('./div/p[4]//text()')).replace(\"\\n\", \"\").replace(\" \", \"\")\n",
    "        # 价格\n",
    "        price = ''.join(li.xpath('./div/span//text()')).strip()\n",
    "\n",
    "#         print(f\"Title: {title}\\t House Info: {house_info}\\t Tags: {tags}\\t Brand: {brand}\\t Price: {price}\\t Method: {method}\\n\")\n",
    "        \n",
    "        dic={'标题':title,\"房屋信息\":house_info,'标签':tags,'维护信息':brand,\"价格\":price,'租房方式':method}\n",
    "#         print(dic)\n",
    "        #将房屋信息加入总列表中\n",
    "        count.append(dic)\n",
    "    #解锁\n",
    "    lock.release()\n",
    "def run():\n",
    "    links = url_creat()\n",
    "    #多线程爬取\n",
    "    for i in links:\n",
    "        x=threading.Thread(target=url_parse,args=(i,))\n",
    "        x.start()\n",
    "    x.join()\n",
    "    #将全部房屋信息转化为excel\n",
    "    data=pd.DataFrame(count)\n",
    "    data.to_excel('深圳租房信息.xlsx',index=False)\n",
    "if __name__ == '__main__':\n",
    "    start_time = time.time()\n",
    "    run()\n",
    "    end_time = time.time()\n",
    "\n",
    "    print(f\"Execution time: {end_time - start_time:.2f} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43333f40",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
